home *** CD-ROM | disk | FTP | other *** search
- /*** glog 2.3 ***/
-
- /*** glog.c -- analysis tool for Unix gopherd logs ***/
-
- /***
- *** Usage : glog [-d [0,1,2,3,4]] [-p plot_number] -U < logfile > reportfile {2>plot_directives}
- ***/
-
- /*** Description: glog munges through a Unix gopherd log and extracts
- *** important looking statistics. It catalogs all hosts that have connected
- *** to the gopherd during the logging period and sorts them according to the
- *** number of accesses. It does the same for each directory and file accessed,
- *** and ranks them according to popularity.
- *** Depending on the argument to the -d option, glog will generate varying
- *** levels of detailed reports. The levels of detail are:
- *** 0 -- (default) Total number of hosts and connections only.
- *** 1 -- All hosts and all data sorted and unsorted.
- *** 2 -- All data accessed by each host and all hosts accessing each entry.
- *** 3 -- Print Exception/Problem Report only.
- *** 4 -- Output plot points for GNUPlot (or whatever you want to use)
- *** This mode also outputs GNUplot directives to stderr. See
- *** the last few lines of Plotter() to change the way the
- *** plots look.
- ***
- *** Numeric arguments to "-p" indicate the type of plot to be produced.
- *** Currently, -p 0 produces the same output as if -d 4 was selected.
- ***
- *** -U get rid of "useless" information. ie. only print sorted info
- ***
- *** Note: "glog" is the same as "glog -d" is the same as "glog -d 0"
- ***/
-
- /*** Version 2.3
- *** by: Andy Wick - awick@csugrad.cs.vt.edu
- *** 4/3/93
- ***
- *** - Fixed ProcessLine to handle paths with spaces, ftp, and other things.
- *** Made it more readable/understandable. "/" is now the same thing as a
- *** root connection.
- *** - Added -U option (Ugly)
- *** - Fixed the last line being processed twice
- ***
- ***/
-
- /***
- *** Version 2.2
- *** by: Chuck Shotton - cshotton@oac.hsc.uth.tmc.edu
- *** 1/4/93
- ***
- *** - Cleaned up argument parsing.
- *** - Added support for multiple plots. (No new plots were implemented though.)
- *** - Made the data field in NODE_RECs dynamic, based on the actual data size.
- ***/
-
- /***
- *** Version 2.1
- *** by: Michael Mealling - Georgia Institute of Technology
- *** Office of Information Technology
- *** michael.meallingl@oit.gatech.edu
- *** 12/29/92
- ***
- *** Added a list per each node so that we can see exactly who is accessing
- *** what. Each node contains a linked list holding who accessed it or
- *** what was accessed depending on which list you are talking about (hosts
- *** or docs). This is switchable with a -d option to glog.
- ***
- *** Also added a very hugly hack for filtering unknown log entries. Since
- *** these can be very important if your server is sick I've added these
- *** entries to an Exception/Problem Report that is printed at the end
- *** of a detail 2 report. This report can be printed alone by specifying
- *** a detail 3 report.
- ***
- *** Also added in Brygg Ulmers plotting stuff so that we can get nice
- *** plots from GNUplot. This is not very robust right now.
- ***/
-
- /***
- *** Versions 1.0
- *** by: Chuck Shotton - U of Texas Health Science Center - Houston,
- *** Office of Academic Computing
- *** cshotton@oac.hsc.uth.tmc.edu
- *** 6/17/92
- ***/
-
-
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- #ifdef THINK_C
- #include <console.h>
- #endif
-
- #define GLOG_VERSION "Gopher Log Analyzer v.2.3\n"
-
- /*******************************/
- /*** The following 2 defines adjust the amount of memory allocated for
- *** A) the data contained in each information node and
- *** B) the size of input lines.
- *** They are inside conditionals to allow command-line or Make file
- *** specification of their size (i.e. cc -DNODE_DATA_SIZE=2048).
- *** WARNING: For now, they should be the same size!!! (InsertDetail, etc.
- *** needs some strcpys replaced with strncpys to avoid data overrun.
- ***/
-
- #ifndef NODE_DATA_SIZE
- #define NODE_DATA_SIZE 1024
- #endif
-
- #ifndef LINE_SIZE
- #define LINE_SIZE 1024
- #endif
-
- /*******************************/
- /***
- *** Detail list. I could have used the left half of the tree but it seemed
- *** confusing when I added the detail list. I fully intended to rewrite the
- *** sorted list by never got around to it.
- ***/
- typedef struct node_list {
- char *data;
- int hits;
- struct node_list *next;
- } NODE_LIST;
-
- typedef NODE_LIST *LIST_PTR;
-
-
- /***
- *** Main tree: used both as the main tree and the sorted list
- ***/
- typedef struct node_rec {
- char *data;
- LIST_PTR llist;
- int hits;
- struct node_rec *left, *right;
- } NODE_REC;
-
- typedef NODE_REC *NODE_PTR;
-
-
- /***
- *** The cruft list is a general list for things that aren't parse-able by
- *** ProcessLine(). It will be printed at the end of detail 2 listing.
- *** and only by a detail 3 listing.
- *** This does seem to be an awful silly thing to have to do.
- ***/
-
- NODE_PTR hosts, docs;
- LIST_PTR cruft;
- char day[4], month[4], date[3], hours[9], year[5], pid[6],
- hostname[120], message1[25], message2[LINE_SIZE], path[LINE_SIZE];
- char start_date[20], stop_date[20];
- int detail, plot_num, USELESS = 1;
-
- /*******************************/
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- char line[LINE_SIZE];
- int i;
-
- #ifdef THINK_C
- argc = ccommand(&argv);
- #endif
- /***
- *** This was nasty. It's been redone into a case. Hi Ho!
- ***/
- plot_num = -1;
- detail = 0;
- i = 1;
- while (i<argc) {
- switch (argv[i][1]) {
- case 'd':
- if (i<argc-1) {
- detail = atoi(argv[++i]);
- }
-
- if (detail>4 || detail<0) detail = 0;
-
- if (plot_num >= 0) {
- fprintf(stderr, "Warning: Details override plots. No data will be plotted!\n");
- plot_num = -1;
- }
- break;
-
- case '?':
- case 'h':
- PrintHelp();
- break;
-
- case 'p': /*custom plots*/
- if (i<argc-1) {
- plot_num = atoi(argv[++i]);
- }
- break;
-
- case 'U':
- USELESS = 0;
- break;
- default:
- PrintHelp();
- break;
- } /*switch*/
-
- i++; /*next arg...*/
-
- } /*while*/
-
- if (detail < 4 && plot_num<0) {
- printf(GLOG_VERSION);
- Initialize();
-
- fgets(line, LINE_SIZE, stdin);
- ProcessLine(line);
- sprintf(start_date, "%s %s, %s", month, date, year);
-
- while (!feof(stdin)) {
- fgets(line, LINE_SIZE, stdin);
- if (feof(stdin))
- break;
- ProcessLine(line);
- }
-
- sprintf(stop_date, "%s %s, %s", month, date, year);
-
- ShowStats();
- }
- else {
- if (detail == 4) plot_num = 0; /*handle the -d 4 case for compatibility*/
- Plotter(plot_num);
- }
- exit(0);
- }
-
- /*******************************/
-
- Initialize()
- {
- hosts = docs = NULL;
- cruft = NULL;
- }
-
- /*******************************/
-
- PrintHelp()
- {
- fprintf(stderr,"Usage: glog -U [-d [0,1,2,3,4]] [-p [0-1]\n");
- fprintf(stderr," stdin should be a gopher log. stdout will be a formatted\n");
- fprintf(stderr," report or plot points. stderr will be errors or plot directives.\n");
- fprintf(stderr, "-U remove useless information\n");
- fprintf(stderr,"-d arguments:\n");
- fprintf(stderr," 0 - Total hosts & connections\n");
- fprintf(stderr," 1 - All hosts & data accesses, sorted & unsorted\n");
- fprintf(stderr," 2 - All data by host & vice versa\n");
- fprintf(stderr," 3 - Exception/Problem report\n");
- fprintf(stderr," 4 - Monthly usage plot (data to stdout, directives to stderr)\n");
- exit(-1);
- }
-
- /*******************************/
- char *getf(char *temp, char *field)
- {
- while(isspace(*temp))
- temp++;
-
- while (!isspace(*temp))
- *field++ = *temp++;
-
- *field = '\0';
-
- return(temp);
- }
- /*******************************/
- /* Read a line from the log file, parse it up, and insert the */
- /* info into the appropriate tables. */
-
- ProcessLine(line)
- char *line;
- {
- int i;
- char *temp;
-
- /***
- *** This is a filter for Code: type messages that I cannot figure out
- *** what in the hell to do with so I'll stick 'em in cruft.
- ***/
- temp = line;
- temp = getf(temp, day);
- if (strstr("MonTueWedThuFriSatSun",day) == NULL)
- InsertDetail(&(cruft),line);
- else {
-
- /***
- *** Scan the line into it's respective parts if everything is ok (so far)
- ***/
- path[0] = '\0';
- temp = getf(temp, month);
- temp = getf(temp, date);
- temp = getf(temp, hours);
- temp = getf(temp, year);
- temp = getf(temp, pid);
- temp = getf(temp, hostname);
- if (hostname[0] == ':')
- return;
- temp = getf(temp, message1); /* : COLON */
- temp = getf(temp, message1);
- temp = getf(temp, message2);
- while(isspace(*temp))
- temp++;
- strcpy(path, temp);
- path[strlen(path)-1] = '\0';
-
-
- /***
- *** This one is for that annoying 0.0.0.1 IP address then gets stuck
- *** in the log when someone is trying to access something you ain't got
- ***/
-
- if (strcmp(hostname,"0.0.0.1")) {
-
- /***
- *** And this one goes with the aboves error message which has
- *** quotes in it that throw off the sscanf
- ***/
-
-
- if (strcmp(message1, "Root") == 0)
- {
- Insert(&hosts, hostname, "Root Connections");
- Insert(&docs, "Root Connections", hostname);
- }
- else if ((strcmp(message1, "retrieved") == 0) && (strcmp(path, "/") == 0))
- {
- Insert(&hosts, hostname, "Root Connections");
- Insert(&docs, "Root Connections", hostname);
- }
- else if (strncmp(message2, "ftp:", 4) == 0)
- {
- Insert(&hosts, hostname, message2);
- Insert(&docs, message2, hostname);
- }
- else if (strcmp(message1, "retrieved") == 0)
- {
- if (path[0] == '\0')
- return;
- Insert(&docs, path, hostname);
- Insert(&hosts, hostname, path);
- }
- else /* wasn't a retrieval */
- {
- InsertDetail(&(cruft),line);
- }
-
- } /*0.0.0.1 Match*/
- else {
- InsertDetail(&(cruft),line);
- }
- } /*Day Match*/
- }
-
- /*******************************/
- /* Insert a raw line into the cruft list. */
-
- InsertCruft(list, data)
- LIST_PTR *list;
- char *data;
- {
- LIST_PTR temp;
-
- if(*list == NULL) {
- temp = (LIST_PTR) malloc(sizeof(NODE_LIST));
- if (temp) {
- temp->next = NULL;
- if (!(temp->data = malloc(strlen(data)+1))) {
- fprintf(stderr, "ERROR: Out of memory in InsertCruft\n");
- exit(1);
- }
- else {
- strcpy(temp->data,data);
- *list = temp;
- }
- }
- else
- printf("Memory error!/n");
- }
- else
- InsertCruft(&((*list)->next),data);
- }
-
- /*******************************/
- /* Insert tree_element into the appropriate symbol table. Increment the */
- /* number of hits if that element is already present. */
- /* Insert list_element into linked list contained in the node that */
- /* tree_element was put in. */
-
- Insert(tree, tree_element, list_element)
- NODE_PTR *tree;
- char *tree_element;
- char *list_element;
- {
- NODE_PTR temp;
- int i;
- if (*tree == NULL) {
- temp = (NODE_PTR) malloc(sizeof(NODE_REC));
- if (temp) {
- temp->left = temp->right = NULL;
- temp->hits = 1;
-
- if (!(temp->data = malloc(strlen(tree_element)+1))) {
- fprintf(stderr, "ERROR: Out of memory in Insert\n");
- exit(1);
- }
- else {
- strcpy (temp->data, tree_element);
- temp->llist = NULL;
- InsertDetail(&(temp->llist), list_element);
- *tree = temp;
- }
- }
- else
- printf("Memory error\n");
- }
- else {
- i=strcmp(tree_element, (*tree)->data);
- if (i > 0)
- Insert(&((*tree)->right), tree_element, list_element);
- else if (i<0)
- Insert(&((*tree)->left), tree_element, list_element);
- else {
- (*tree)->hits += 1;
- InsertDetail(&((*tree)->llist), list_element);
- }
- }
- }
-
- /*******************************/
- int total_hits, total_nodes;
- NODE_PTR by_num;
-
- /*******************************/
- /* Dump out the contents of the given symbol table and sort by */
- /* number of "hits" on the fly. */
-
- DumpTree(tree)
- NODE_PTR tree;
- {
- if (tree == NULL)
- return;
- else {
- DumpTree(tree->left);
- if ((detail == 1) || (detail == 2)) {
- if (USELESS)
- printf("%-60.60s %5d\n", tree->data, tree->hits);
- if (detail > 1)
- DumpDetailList(tree->llist);
- }
- total_hits += tree->hits;
- total_nodes++;
- InsertByNum(tree);
- DumpTree(tree->right);
- }
- }
-
- /*******************************/
-
- DumpStats(tree)
- NODE_PTR tree;
- {
- total_hits = 0;
- total_nodes= 0;
- by_num = NULL;
- DumpTree(tree);
- }
-
- /*******************************/
- /* Turn a tree node into an element in a linked list */
-
- InsertByNum(node)
- NODE_PTR node;
- {
- NODE_PTR temp, temp2;
- if (by_num == NULL) {
- by_num = node;
- node->left == NULL;
- }
- else {
- temp = by_num;
- temp2 = temp->left;
- if (node->hits >= temp->hits) {
- node->left = temp;
- by_num = node;
- }
- else {
- while (temp2 != NULL) {
- if (node->hits > temp2->hits) {
- temp->left = node;
- node->left = temp2;
- return;
- }
- else {
- temp = temp2;
- temp2 = temp->left;
- }
- }
- temp->left = node;
- node->left = NULL;
- }
- }
- }
-
- /*******************************/
- /* Dump out the linked list contents */
-
- DumpByNum(tree)
- NODE_PTR tree;
- {
- if ((detail == 1) || (detail == 2))
- while (tree != NULL) {
- printf("%-60.60s %5d (%0.2f%%)\n", tree->data, tree->hits,
- (float)100.0*tree->hits/(float) total_hits);
- if (detail == 2)
- DumpDetailList(tree->llist);
- tree = tree->left;
- }
- }
-
- /*******************************/
- /* Dump out Detail list for each node->llist of the tree*/
-
- DumpDetailList(list)
- LIST_PTR list;
- {
- while (list != NULL) {
- printf(" %-47.47s %5d\n", list->data, list->hits);
- list = list->next;
- }
- }
-
- /*******************************/
- /* Dump out cruft list and strip \n off the end so our report */
- /* looks nice. Naughty gopherd....*/
-
- DumpCruftList(list)
- LIST_PTR list;
- {
- int linelen;
- while (list != NULL) {
- linelen=strlen(list->data);
- if (list->data[linelen - 1]=='\n')
- list->data[linelen - 1]='\0';
- printf("%-70.70s %5d\n", list->data, list->hits);
- list = list->next;
- }
- }
-
- /*******************************/
- /* Add item to detail list internal to node*/
-
- InsertDetail(list,element)
- LIST_PTR *list;
- char *element;
- {
- LIST_PTR temp, temp2;
- int i;
- if (*list == NULL) {
- temp = (LIST_PTR) malloc(sizeof(NODE_LIST));
- if (temp) {
- temp->next = NULL;
- temp->hits = 1;
- if (!(temp->data = malloc(strlen(element)+1))) {
- fprintf(stderr, "ERROR: Out of memory in InsertDetail\n");
- exit(1);
- }
- else {
- strcpy(temp->data, element);
- *list=temp;
- }
- }
- else
- printf("Memory error in InsertDetail()\n");
- }
- else {
- i=strcmp(element, (*list)->data);
- if (i > 0) {
- temp = (LIST_PTR) malloc(sizeof(NODE_LIST));
- temp->hits = 1;
- if (!(temp->data = malloc(strlen(element)+1))) {
- fprintf(stderr, "ERROR: Out of memory in InsertDetail 2\n");
- exit(1);
- }
- else {
- strcpy(temp->data, element);
- temp->next = *list;
- *list=temp;
- }
- }
- else if (i == 0) {
- ++(*list)->hits;
- }
- else {
- temp= *list;
- temp2=(*list)->next;
- while (temp2 != NULL) {
- i=strcmp(element, temp2->data);
- if (i > 0) {
- temp->next = (LIST_PTR) malloc(sizeof(NODE_LIST));
- temp->next->next=temp2;
- if (!(temp->next->data = malloc(strlen(element)+1))) {
- fprintf(stderr, "ERROR: Out of memory in InsertDetail 3\n");
- exit(1);
- }
- else {
- strcpy(temp->next->data,element);
- temp->next->hits=1;
- return;
- }
- }
- else if (i == 0) {
- ++temp2->hits;
- return;
- }
- else {
- temp=temp2;
- temp2=temp->next;
- }
- }
- temp->next = (LIST_PTR) malloc(sizeof(NODE_LIST));
- temp->next->next = NULL;
- if (!(temp->next->data = malloc(strlen(element)+1))) {
- fprintf(stderr, "ERROR: Out of memory in InsertDetail 4\n");
- exit(1);
- }
- else {
- strcpy(temp->next->data,element);
- temp->next->hits=1;
- }
- }
- }
-
- }
-
- /*********************************************************************/
- /*** Plotting routines
- ***/
- PlotByMonth0()
- {
- int ointday,ointmonth,intday,intmonth;
- int points,dtotal;
- char line[2048];
-
- intmonth=0;
- intday=0;
- fgets(line, 2048,stdin);
- sscanf(line, "%s %s %s %s %s %s %s : %[^\n]",
- day, month, date, hours, year, pid, hostname,
- message2);
- ointday=atoi(date);
- fprintf(stderr,"set xtics (\"%s/%s\" 2,",month,date);
- points = 1;
- while (!feof(stdin)) {
- fgets(line, 2048, stdin);
- sscanf(line, "%s %s %s %s %s %s %s : %[^\n]",
- day, month, date, hours, year, pid, hostname,
- message2);
- intday=atoi(date);
- if (!strcmp(month,"Jan"))
- intmonth=1;
- else
- if (!strcmp(month,"Feb"))
- intmonth=2;
- else
- if (!strcmp(month,"Mar"))
- intmonth=3;
- else
- if (!strcmp(month,"Apr"))
- intmonth=4;
- else
- if (!strcmp(month,"May"))
- intmonth=5;
- else
- if (!strcmp(month,"Jun"))
- intmonth=6;
- else
- if (!strcmp(month,"Jul"))
- intmonth=7;
- else
- if (!strcmp(month,"Aug"))
- intmonth=8;
- else
- if (!strcmp(month,"Sep"))
- intmonth=9;
- else
- if (!strcmp(month,"Oct"))
- intmonth=10;
- else
- if (!strcmp(month,"Nov"))
- intmonth=11;
- else
- if (!strcmp(month,"Dec"))
- intmonth=12;
- if (intday != ointday) {
- ++points;
- printf("%d %d\n",points,dtotal);
- if ((intday == 1) || (intday == 15) || (intmonth != ointmonth))
- fprintf(stderr,"\"%s/%s\" %d,",month,date,points);
- dtotal = 1;
- } else {
- ++dtotal;
- }
- ointday=intday;
- ointmonth=intmonth;
- }
- fprintf(stderr,"\"\" %d)\n",points);
- /***
- *** Put your own gnuplot directives here. These are the ones I like but
- *** you can (and should) edit them to make it work for you.
- ***/
- fprintf(stderr,"set data style linespoints\n");
- fprintf(stderr,"set tics out\n");
- fprintf(stderr,"set grid\n");
- fprintf(stderr,"set title \"Gopher Usage\"\n");
- }
-
-
- /*******************************/
- /*** Dispatch appropriate plot based on -p argument
- ***/
- Plotter(plot_num)
- int plot_num;
- {
- switch (plot_num) {
- case 0:
- PlotByMonth0();
- break;
-
- /* Add additional plot routines here */
-
- default:
- fprintf(stderr, "ERROR: Invalid plot number (%d)\n", plot_num);
- exit(1);
- break;
- }
- }
-
-
- /*******************************/
- /* Show all the stats gleaned from the log file */
- /* Aint this ugly? Aint I lazy? */
-
- ShowStats()
- {
- printf("Detail level:");
- if (detail == 0) printf(" Totals only\n");
- else if (detail == 1) printf(" Host and Document totals only\n");
- else if (detail == 2) printf(" Host and Document detail plus totals\n");
- else if (detail == 3) printf(" Exception/Problem Report only\n");
- printf("Report Period: %s to %s\n", start_date, stop_date);
- if ((detail != 0) && (detail !=3) && USELESS) {
- printf("=========================================================\n");
- printf("\nAll Hosts:\n-----------------------------\n");
- }
- DumpStats(hosts);
- if ((detail != 0) && (detail != 3)) {
- printf("=========================================================\n");
- printf("Most Active Hosts:\n-----------------------------\n");
- DumpByNum(by_num);
- }
- printf("------------------------\n");
- if (detail !=3) {
- printf("Total Hosts: %d\n", total_nodes);
- printf("Total Connections: %d\n", total_hits);
- }
- if ((detail != 0) && (detail !=3) && USELESS) {
- printf("=========================================================\n");
- printf("All Data Accesses:\n-----------------------------\n");
- }
- DumpStats(docs);
- if ((detail != 0) && (detail !=3)) {
- printf("=========================================================\n");
- printf("Most Popular Data:\n-----------------------------\n");
- DumpByNum(by_num);
- }
- if (detail !=3)
- printf("------------------------\n");
- if ((detail == 2) || (detail == 3)) {
- printf("=========================================================\n");
- printf("Exception/Problem Report\n");
- printf("NOTE: THESE ENTRIES MAY DENOTE A SERVER PROBLEM. THEY SHOULD BE LOOKED OVER!\n");
- DumpCruftList(cruft);
- printf("------------------------\n");
- }
- printf("Total Data Accesses: %d\n", total_hits);
- }
-